#delimit;
clear all;
set more off;
set logtype text;
capture log close regressions_collapsed_rf;

** REPLACE FILE PATH WITH PATH TO RELEVANT REPLICATION FILES;
local fileloc = "~/KMS_REPLICATION";
pause on;
set maxvar 30000;
set matsize 10000;

log using `fileloc'/log_files/regressions_collapsed_rf.txt, replace name(regressions_collapsed_rf);

** Specify basic appearance of graphs using global;
global graph_options "graphregion(fcolor(white) color(white) icolor(white)) plotregion()";


** NOTE: do-file calls other do-files at various points, indicating both the path specified in the local `fileloc' and the data file to be called when using subgroup analysis;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXX PROGRAMS, LOCALS, AND GLOBALS REFERENCED XXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

** Program to calculate marginal effects by subgroup. Will use this after subgroup regressions;
cap program drop margin_splits;
program define margin_splits;
	margins, dydx(`1') over(`2') continuous noestimcheck post vsquish;
	eststo;
	lincom _b[1.`2'] - _b[0.`2'];
	estadd scalar test = r(estimate);
	estadd scalar se = r(se); 
end;

** Get standard deviations of pollution variables for potential calculations later;
use `fileloc'/data/weekly_pollution_weather_traffic_KMS, clear;
xtset mother_zip;
foreach var in weekly_co weekly_pm10 {;
	xtsum `var';
	global `var'_within_sd = r(sd_w);
	di $`var'_within_sd;
	global `var'_between_sd = r(sd_b);
	di $`var'_between_sd;
};

** Certain weather cutoffs by percentile – used for later weather condition marginal calculations;
foreach weather in max_temp windspeed rain humidS days_with_fog {;
		sum `weather', d;
		local one_`weather' = r(p5);
		local two_`weather' = r(p10);
		local three_`weather' = r(p25);
		local four_`weather' = r(p50);
		local five_`weather' = r(p75);
		local six_`weather' = r(p90);
		local seven_`weather' = r(p95);
};

** Run regression prep;
** These do-files prepare the data for all regressions, calculate standard deviations, and define globals;
do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _all;


**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXX REDUCED FORM AND FIRST STAGE XXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
eststo clear;

** Note all regressions use frequency weights with collapsed data. See text;

** Make coefficients easier to read by reducing number of preceding zeros. All in text calculations adjust accordingly;
replace died = died * 1000;

*** EFFECTS BY DISTANCE;
** Various fixed effects;
egen zip_rmonth = group(mother_zip event_month) /*mother zip code by month of observation*/;
egen month_by_year = group(event_month event_year) /*month of observation by year of observation*/;

xi i.birth_quarter i.month_by_year /*fixed effects for quarter of birth and month-by-year effects generated above*/;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
** TABLE 4; 
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

** Loop performs regressions on reduced form and first stage for CO and PM10;
** c. prefix allows for interaction of continuous variable in calculation of marginal effects post-regression;
** # indicates interactions. This format allows the margins command to deal with quadratic effects and interactions;
foreach outcome in died weekly_co weekly_pm10  {;

	areg `outcome' 

	_I*

	c.flow_by_length_5
	c.flow_by_length_5#(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain
	
	)

	c.flow_by_length_10
	c.flow_by_length_10#(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain
	
	)
	
	c.flow_by_length_15
	c.flow_by_length_15#(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain
	
	)

	c.flow_by_length_20
	c.flow_by_length_20#(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain

	)
	
	$triweekly_co $triweekly_pm10
		
	$controls 
	$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

	di "BASIC `outcome'";
		
	margins, dydx(flow_by_length_5 flow_by_length_10 flow_by_length_15 flow_by_length_20) continuous noestimcheck post vsquish;
	eststo;

};

esttab using `fileloc'/regs/reduced_form_by_miles.xls, 
	replace
	tab
	mlabels(, title)
	se
	plain
	cells(b(fmt(%9.4f) star) se(fmt(%9.4f))) 
	starlevels(\text{$^{\ast}$} 0.10 \text{$^{\ast\ast}$} 0.05 \text{$^{\ast\ast\ast}$} 0.01);	

eststo clear;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
** TABLE 5 AND FIGURES 1, 2, 3, AND 4; 
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;


** Keep only those with at least one traffic sensor within 15 miles, to correspond to by-distance results in Table 4. See text;
egen totaltotflowbase = total(tot_flow_base), by(mother_zip);
drop if totaltotflowbase == 0;


**** EFFECTS OVERALL AND BY WEATHER;
** Interactions operate as notes for table 4 describe above;
foreach outcome in weekly_co weekly_pm10 died {;

	eststo clear;

	areg `outcome' 

	_I*
	
	c.tot_flow_base##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain
	
	)

	$triweekly_co $triweekly_pm10

	$controls 
	$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

	di "Weather `outcome'";

	** Density graph value;
	gen d`outcome'_dflow =
	_b[tot_flow_base]
+ _b[c.tot_flow_base#c.max_temp]*c.max_temp
+ _b[c.tot_flow_base#c.max_temp#c.max_temp]*c.max_temp#c.max_temp 
+ _b[c.tot_flow_base#c.max_temp#c.max_temp#c.max_temp]*c.max_temp#c.max_temp#c.max_temp 
+ _b[c.tot_flow_base#c.humidS]*c.humidS
+ _b[c.tot_flow_base#c.humidS#c.humidS]*c.humidS#c.humidS
+ _b[c.tot_flow_base#c.humidS#c.humidS#c.humidS]*c.humidS#c.humidS#c.humidS
+ _b[c.tot_flow_base#c.rain]*c.rain
+ _b[c.tot_flow_base#c.rain#c.rain]*c.rain#c.rain
+ _b[c.tot_flow_base#c.rain#c.rain#c.rain]*c.rain#c.rain#c.rain
+ _b[c.tot_flow_base#c.windspeed]*c.windspeed
+ _b[c.tot_flow_base#c.windspeed#c.windspeed]*c.windspeed#c.windspeed
+ _b[c.tot_flow_base#c.windspeed#c.windspeed#c.windspeed]*c.windspeed#c.windspeed#c.windspeed
+ _b[c.tot_flow_base#c.days_with_rain]*c.days_with_rain
+ _b[c.tot_flow_base#c.days_with_fog]*c.days_with_fog;

	eststo weather_regs;

	margins, dydx(tot_flow_base) continuous noestimcheck post vsquish;
	eststo;
	estimates restore weather_regs;
	
	** Temp, humidity, windspeed, and rain;
	foreach weather in max_temp humidS windspeed rain {;
		margins, dydx(tot_flow_base) at(`weather' = (`three_`weather'' `five_`weather'')) continuous noestimcheck post vsquish;
		eststo;
		lincom _b[1._at] - _b[2._at];
		estadd scalar test = r(estimate);
		estadd scalar se = r(se); 
		estimates restore weather_regs;		
	};

	margins, dydx(tot_flow_base) at(days_with_fog = (0 3)) continuous noestimcheck post vsquish;
		eststo;
		lincom _b[1._at] - _b[2._at];
		estadd scalar test = r(estimate);
		estadd scalar se = r(se); 
		estimates restore weather_regs;		

	eststo drop weather_regs;

	esttab using `fileloc'/regs/reduced_form_weather_`outcome'.xls, 
		replace
		tab
		mlabels(, title)
		se
		plain
		cells(b(fmt(%9.4f) star) se(fmt(%9.4f))) 
		starlevels(\text{$^{\ast}$} 0.10 \text{$^{\ast\ast}$} 0.05 \text{$^{\ast\ast\ast}$} 0.01)		
		stats(test se, label("Diff" "Std. Error of Diff") fmt(%9.6fc));	

	** GRAPHS IN FIGURES 1, 2, 3, AND 4;
	** DENSITY GRAPHS;	
	sum `outcome', d;
	local highcut = r(p90);
	local lowcut = r(p10);	
	
	sum d`outcome'_dflow [fw = weight];

	local mean = r(mean);

	kdensity d`outcome'_dflow [fw = weight], n(200) generate(`outcome'_x 		`outcome'_density);
	local bw = r(bwidth);
	
	twoway line `outcome'_density `outcome'_x if `outcome' >= `lowcut' & `outcome' <= `highcut',
		$graph_options
		lcolor(black)
		xline(`mean', lpattern(dash) lcolor(black))
		xtitle("Derivative with Respect to Flow")
		ytitle("Density");
	graph export `fileloc'/graphs/`outcome'_density.eps, replace;

	** BY RAIN;
	twoway (kdensity d`outcome'_dflow if rain<=`three_rain' [fw = weight],  lpattern(solid) lcolor(black))
	(kdensity d`outcome'_dflow if rain>=`five_rain' [fw = weight],  lpattern(dash) lcolor(black)), 
		title("")
		ytitle("Density") xtitle("Derivative with Respect to Flow") 
		legend(label(1 "Low Rain") label(2 "High Rain"))
		$graph_options;	
	graph export "`fileloc'/graphs/`outcome'_rain.eps", replace;

	** BY WINDSPEED;
	twoway(kdensity d`outcome'_dflow if windspeed <=`three_windspeed' [fw = weight],  lpattern(solid) lcolor(black))
	(kdensity d`outcome'_dflow if windspeed >= `five_windspeed' [fw = weight],  lpattern(dash) lcolor(black)), 
		title("")
		ytitle("Density") xtitle("Derivative with Respect to Flow") 
		legend(label(1 "Low Wind") label(2 "High Wind"))
		$graph_options;
	graph export "`fileloc'/graphs/`outcome'_wind.eps", replace;

	** BY HUMIDITY;
	twoway(kdensity d`outcome'_dflow if humidS <= `three_humidS' [fw = weight],  lpattern(solid) lcolor(black))
	(kdensity d`outcome'_dflow if humidS>=`five_humidS' [fw = weight],  lpattern(dash) lcolor(black)), 
		title("")	
		ytitle("Density") xtitle("Derivative with Respect to Flow") 
		legend(label(1 "Low Humidity") label(2 "High Humidity"))
		$graph_options;
	graph export "`fileloc'/graphs/`outcome'_humid.eps", replace;

	** BY TEMPERATURE;
	twoway(kdensity d`outcome'_dflow if max_temp <=`three_max_temp' [fw = weight],  lpattern(solid) lcolor(black))
	(kdensity d`outcome'_dflow if max_temp >= `five_max_temp' [fw = weight], lpattern(dash) lcolor(black)), 
		title("")
		ytitle("Density") xtitle("Derivative with Respect to Flow") 
		legend(label(1 "Low Temp") label(2 "High Temp"))
		$graph_options;
	graph export "`fileloc'/graphs/`outcome'_max_temp.eps", replace;
		
};


eststo clear;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
** TABLE 6 – Panel B (within 15 miles); 
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

**** SUBGROUPS;
** Blacks;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _black;

** Make coefficients easier to read;
replace died = died * 1000;

** Keep only those with at least one traffic sensor within 15 miles, to correspond to by-distance results in Table 4. See text;
egen totaltotflowbase = total(tot_flow_base), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

** Subgroup analysis includes interactions for indicators of each subgroup category: black, hispanic, graduation status, medicaid, low birthweight, and maternal education;
** i. prefix tells margins command that interacted variables are indicators, not continuous;
qui areg died 

_I*

i.black##c.tot_flow_base##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain

	)
	
	$triweekly_co $triweekly_pm10 

male asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "black";

margin_splits tot_flow_base black;


** Hispanics;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _hisp;

** Make coefficients easier to read;
replace died = died * 1000;

** KEEP ONLY THOSE WITH AT LEAST ONE TRAFFIC SENSOR IN 15 MILES;
egen totaltotflowbase = total(tot_flow_base), by(mother_zip);
drop if totaltotflowbase == 0;


egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

qui areg died 

_I*

i.hisp##c.tot_flow_base##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain

	)
	
$triweekly_co $triweekly_pm10 

male black asian other_race HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "hisp";

margin_splits tot_flow_base hisp;


** Medicaid;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _medicaid;

** Make coefficients easier to read;
replace died = died * 1000;

** KEEP ONLY THOSE WITH AT LEAST ONE TRAFFIC SENSOR IN 15 MILES;
egen totaltotflowbase = total(tot_flow_base), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

qui areg died 

_I*

i.medicaid##c.tot_flow_base##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "medicaid";

margin_splits tot_flow_base medicaid;


** HS Dropout;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _HS_grad;


** Make coefficients easier to read;
replace died = died * 1000;

** KEEP ONLY THOSE WITH AT LEAST ONE TRAFFIC SENSOR IN 15 MILES;
egen totaltotflowbase = total(tot_flow_base), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

qui areg died 

_I*

i.HS_grad##c.tot_flow_base##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "HS_grad";

margin_splits tot_flow_base HS_grad;



** Premature;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _premature;

** Make coefficients easier to read;
replace died = died * 1000;

** KEEP ONLY THOSE WITH AT LEAST ONE TRAFFIC SENSOR IN 15 MILES;
egen totaltotflowbase = total(tot_flow_base), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

qui areg died 

_I*

i.premature##c.tot_flow_base##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "premature";

margin_splits tot_flow_base premature;



** Low BW;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _low_weight;

** Make coefficients easier to read;
replace died = died * 1000;

** KEEP ONLY THOSE WITH AT LEAST ONE TRAFFIC SENSOR IN 15 MILES;
egen totaltotflowbase = total(tot_flow_base), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

qui areg died 

_I*

i.low_weight##c.tot_flow_base##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "low_weight";

margin_splits tot_flow_base low_weight;


esttab using `fileloc'/regs/reduced_form_groups_flowmeasure.xls, 
	replace
	tab
	mlabels(, title)
	se
	plain
	cells(b(fmt(%9.4f) star) se(fmt(%9.4f))) 
	starlevels(\text{$^{\ast}$} 0.10 \text{$^{\ast\ast}$} 0.05 \text{$^{\ast\ast\ast}$} 0.01)
	stats(test se, label("Diff" "Std. Error of Diff") fmt(%9.6fc));	


**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
** TABLE 6 – Panel A (within 5 miles); 
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
** ONLY WITHIN 5 MILES;


** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _all;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXXXXXXXXXXXXXX REDUCED FORM XXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
eststo clear;

** Make coefficients easier to read;
replace died = died * 1000;

*** EFFECTS BY DISTANCE;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

*** Use only traffic within 5 miles;
egen totaltotflowbase = total(flow_by_length_5), by(mother_zip);
drop if totaltotflowbase == 0;


**** EFFECTS OVERALL AND BY WEATHER;
foreach outcome in died {;

	eststo clear;

	qui areg `outcome' 

	_I*
	
	c.flow_by_length_5##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_temp#c.max_temp
	c.humidS#c.humidS
	c.windspeed#c.windspeed
	c.rain#c.rain

	c.max_temp#c.max_temp#c.max_temp
	c.humidS#c.humidS#c.humidS
	c.windspeed#c.windspeed#c.windspeed
	c.rain#c.rain#c.rain
	
	)

	$triweekly_co $triweekly_pm10

	$controls 
	$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

	di "Weather `outcome'";

	eststo weather_regs;

	margins, dydx(flow_by_length_5) continuous noestimcheck post vsquish;
	eststo;
	estimates restore weather_regs;
	
	** Temp, humidity, windspeed, and rain;
	foreach weather in max_temp humidS windspeed rain {;
		margins, dydx(flow_by_length_5) at(`weather' = (`three_`weather'' `five_`weather'')) continuous noestimcheck post vsquish;
		eststo;
		lincom _b[1._at] - _b[2._at];
		estadd scalar test = r(estimate);
		estadd scalar se = r(se); 
		estimates restore weather_regs;		
	};

	margins, dydx(flow_by_length_5) at(days_with_fog = (0 3)) continuous noestimcheck post vsquish;
		eststo;
		lincom _b[1._at] - _b[2._at];
		estadd scalar test = r(estimate);
		estadd scalar se = r(se); 
		estimates restore weather_regs;		

	eststo drop weather_regs;
	
	esttab using `fileloc'/regs/reduced_form_weather_`outcome'_5miles.xls, 
		replace
		tab
		mlabels(, title)
		se
		plain
		cells(b(fmt(%9.4f) star) se(fmt(%9.4f))) 
		starlevels(\text{$^{\ast}$} 0.10 \text{$^{\ast\ast}$} 0.05 \text{$^{\ast\ast\ast}$} 0.01)		
		stats(test se, label("Diff" "Std. Error of Diff") fmt(%9.6fc));	
		
};





eststo clear;

**** SUBGROUPS;
** Blacks;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _black;

** Make coefficients easier to read;
replace died = died * 1000;

*** Use only traffic within 5 miles;
egen totaltotflowbase = total(flow_by_length_5), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

foreach weather in max_temp windspeed humidS {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3/1000;
	gen float `weather'quart = `weather'^4/10000;
	gen float `weather'quint = `weather'^5/100000;
};

foreach weather in rain days_with_rain days_with_fog {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3;
	gen float `weather'quart = `weather'^4;
	gen float `weather'quint = `weather'^5;
};

** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
	gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
	gen double flowIV_5_`weathervar'=(flow_by_length_5)*`weathervar';
	gen double flowIV_10_`weathervar'=(flow_by_length_10)*`weathervar';
	gen double flowIV_15_`weathervar'=(flow_by_length_15)*`weathervar';
	gen double flowIV_20_`weathervar'=(flow_by_length_20)*`weathervar';
	format flowIV_*`weathervar' %16.9g;	
};

qui areg died 

_I*

i.black##c.flow_by_length_5##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_tempsq
	c.humidSsq
	c.windspeedsq
	c.rainsq

	c.max_tempcu
	c.humidScu
	c.windspeedcu
	c.raincu

	)
	
	$triweekly_co $triweekly_pm10 

male asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "black";

local zips = e(N_clust);
estadd scalar zips = `zips';

margin_splits flow_by_length_5 black;


** Hispanics;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _hisp;

** Make coefficients easier to read;
replace died = died * 1000;

*** Use only traffic within 5 miles;
egen totaltotflowbase = total(flow_by_length_5), by(mother_zip);
drop if totaltotflowbase == 0;


egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

foreach weather in max_temp windspeed humidS {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3/1000;
	gen float `weather'quart = `weather'^4/10000;
	gen float `weather'quint = `weather'^5/100000;
};

foreach weather in rain days_with_rain days_with_fog {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3;
	gen float `weather'quart = `weather'^4;
	gen float `weather'quint = `weather'^5;
};

** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
	gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
	gen double flowIV_5_`weathervar'=(flow_by_length_5)*`weathervar';
	gen double flowIV_10_`weathervar'=(flow_by_length_10)*`weathervar';
	gen double flowIV_15_`weathervar'=(flow_by_length_15)*`weathervar';
	gen double flowIV_20_`weathervar'=(flow_by_length_20)*`weathervar';
	format flowIV_*`weathervar' %16.9g;	
};

qui areg died 

_I*

i.hisp##c.flow_by_length_5##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_tempsq
	c.humidSsq
	c.windspeedsq
	c.rainsq

	c.max_tempcu
	c.humidScu
	c.windspeedcu
	c.raincu

	)
	
$triweekly_co $triweekly_pm10 

male black asian other_race HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "hisp";

margin_splits flow_by_length_5 hisp;


** Medicaid;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _medicaid;

** Make coefficients easier to read;
replace died = died * 1000;

*** Use only traffic within 5 miles;
egen totaltotflowbase = total(flow_by_length_5), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

foreach weather in max_temp windspeed humidS {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3/1000;
	gen float `weather'quart = `weather'^4/10000;
	gen float `weather'quint = `weather'^5/100000;
};

foreach weather in rain days_with_rain days_with_fog {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3;
	gen float `weather'quart = `weather'^4;
	gen float `weather'quint = `weather'^5;
};

** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
	gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
	gen double flowIV_5_`weathervar'=(flow_by_length_5)*`weathervar';
	gen double flowIV_10_`weathervar'=(flow_by_length_10)*`weathervar';
	gen double flowIV_15_`weathervar'=(flow_by_length_15)*`weathervar';
	gen double flowIV_20_`weathervar'=(flow_by_length_20)*`weathervar';
	format flowIV_*`weathervar' %16.9g;	
};

qui areg died 

_I*

i.medicaid##c.flow_by_length_5##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_tempsq
	c.humidSsq
	c.windspeedsq
	c.rainsq

	c.max_tempcu
	c.humidScu
	c.windspeedcu
	c.raincu

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "medicaid";

margin_splits flow_by_length_5 medicaid;


** HS Dropout;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _HS_grad;

** Make coefficients easier to read;
replace died = died * 1000;

*** Use only traffic within 5 miles;
egen totaltotflowbase = total(flow_by_length_5), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

foreach weather in max_temp windspeed humidS {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3/1000;
	gen float `weather'quart = `weather'^4/10000;
	gen float `weather'quint = `weather'^5/100000;
};

foreach weather in rain days_with_rain days_with_fog {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3;
	gen float `weather'quart = `weather'^4;
	gen float `weather'quint = `weather'^5;
};

** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
	gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
	gen double flowIV_5_`weathervar'=(flow_by_length_5)*`weathervar';
	gen double flowIV_10_`weathervar'=(flow_by_length_10)*`weathervar';
	gen double flowIV_15_`weathervar'=(flow_by_length_15)*`weathervar';
	gen double flowIV_20_`weathervar'=(flow_by_length_20)*`weathervar';
	format flowIV_*`weathervar' %16.9g;	
};

qui areg died 

_I*

i.HS_grad##c.flow_by_length_5##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_tempsq
	c.humidSsq
	c.windspeedsq
	c.rainsq

	c.max_tempcu
	c.humidScu
	c.windspeedcu
	c.raincu

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "HS_grad";

margin_splits flow_by_length_5 HS_grad;



** Premature;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _premature;

** Make coefficients easier to read;
replace died = died * 1000;

*** Use only traffic within 5 miles;
egen totaltotflowbase = total(flow_by_length_5), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

foreach weather in max_temp windspeed humidS {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3/1000;
	gen float `weather'quart = `weather'^4/10000;
	gen float `weather'quint = `weather'^5/100000;
};

foreach weather in rain days_with_rain days_with_fog {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3;
	gen float `weather'quart = `weather'^4;
	gen float `weather'quint = `weather'^5;
};

** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
	gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
	gen double flowIV_5_`weathervar'=(flow_by_length_5)*`weathervar';
	gen double flowIV_10_`weathervar'=(flow_by_length_10)*`weathervar';
	gen double flowIV_15_`weathervar'=(flow_by_length_15)*`weathervar';
	gen double flowIV_20_`weathervar'=(flow_by_length_20)*`weathervar';
	format flowIV_*`weathervar' %16.9g;	
};

qui areg died 

_I*

i.premature##c.flow_by_length_5##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_tempsq
	c.humidSsq
	c.windspeedsq
	c.rainsq

	c.max_tempcu
	c.humidScu
	c.windspeedcu
	c.raincu

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri low_weight second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "premature";

margin_splits flow_by_length_5 premature;



** Low BW;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _low_weight;

** Make coefficients easier to read;
replace died = died * 1000;

*** Use only traffic within 5 miles;
egen totaltotflowbase = total(flow_by_length_5), by(mother_zip);
drop if totaltotflowbase == 0;

egen zip_rmonth = group(mother_zip event_month);

egen month_by_year = group(event_month event_year);

xi i.birth_quarter i.month_by_year;

foreach weather in max_temp windspeed humidS {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3/1000;
	gen float `weather'quart = `weather'^4/10000;
	gen float `weather'quint = `weather'^5/100000;
};

foreach weather in rain days_with_rain days_with_fog {;
	gen double `weather'sq = `weather'^2;
	gen double `weather'cu = `weather'^3;
	gen float `weather'quart = `weather'^4;
	gen float `weather'quint = `weather'^5;
};

** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
	gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
	gen double flowIV_5_`weathervar'=(flow_by_length_5)*`weathervar';
	gen double flowIV_10_`weathervar'=(flow_by_length_10)*`weathervar';
	gen double flowIV_15_`weathervar'=(flow_by_length_15)*`weathervar';
	gen double flowIV_20_`weathervar'=(flow_by_length_20)*`weathervar';
	format flowIV_*`weathervar' %16.9g;	
};

qui areg died 

_I*

i.low_weight##c.flow_by_length_5##(

	c.max_temp
	c.humidS
	c.windspeed
	c.rain
	c.days_with_rain
	c.days_with_fog 

	c.max_tempsq
	c.humidSsq
	c.windspeedsq
	c.rainsq

	c.max_tempcu
	c.humidScu
	c.windspeedcu
	c.raincu

	)

$triweekly_co $triweekly_pm10 

male black asian other_race hisp HS_grad college_grad twins trip_or_more age19_25 age26_30 age31_35 age36up medicaid care_first_tri premature second_born third_born fourth_or_more

$agespline [fw = weight], absorb(zip_rmonth) cluster(mother_zip);

di "low_weight";

margin_splits flow_by_length_5 low_weight;

esttab using `fileloc'/regs/reduced_form_groups_flowmeasure_5miles.xls, 
	replace
	tab
	mlabels(, title)
	se
	plain
	cells(b(fmt(%9.4f) star) se(fmt(%9.4f))) 
	starlevels(\text{$^{\ast}$} 0.10 \text{$^{\ast\ast}$} 0.05 \text{$^{\ast\ast\ast}$} 0.01)
	stats(test se zips, label("Diff" "Std. Error of Diff" "Zips Used") fmt(%9.6fc %9.6fc %9.0fc));	


log close regressions_collapsed_rf;


	
